import pandas as pd
import os

def prepare_weibo_data():
    # 读取本地数据文件
    data_path = 'data/weibo_senti_100k.csv'
    
    print("正在读取数据...")
    df = pd.read_csv(data_path, encoding='utf-8')
    
    # 分割训练集和测试集
    train_size = int(len(df) * 0.8)  # 80%用于训练
    train_df = df[:train_size]
    test_df = df[train_size:]
    
    # 保存处理后的数据
    train_df.to_csv('data/weibo_train.csv', index=False, encoding='utf-8')
    test_df.to_csv('data/weibo_test.csv', index=False, encoding='utf-8')
    
    print("数据集准备完成！")
    print(f"训练集大小: {len(train_df)} 条数据")
    print(f"测试集大小: {len(test_df)} 条数据")

if __name__ == '__main__':
    prepare_weibo_data()